# -*- coding: utf-8 -*-
import scrapy
import re

import pymysql

conn = pymysql.connect(
        host='localhost',
        port = 3306,
        user='suweif',
        passwd='i8neJfFTD7JXhra8',
        db ='suweif',
        use_unicode=True,
        charset="utf8",
        cursorclass=pymysql.cursors.DictCursor
        )

class ByjSpider(scrapy.Spider):
    name = 'Byj'
    allowed_domains = ['www.fosss.org']
    start_urls = ['http://www.fosss.org/YinGuo/Index.html']

    def parse(self, response):
        # print(response.text)
        cells = response.xpath('//*[@id="mainContent"]/table/tbody/tr/td')

        for cell in cells:
            # print(cell.get())
            href = cell.css('a::attr(href)')

            order_re = re.match(r'.+[/]byj([0-9]+)[.]html', href)

            order = 1
            if order_re:
                order = order_re.group(1)

            link_text = cell.xpath('.//a/text()')
            sub_title = cell.xpath('.//p/text()')

            sub_title_text = ""

            print(link_text.get())
            print(href.get())
            if (len(sub_title) > 1):
                sub_title_text = sub_title[1].get().strip()
                # print(sub_title[1].get().strip())

            cursor = conn.cursor(cursor=pymysql.cursors.DictCursor)

